# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/16 9:51
# @File: qu_1.py

import requests
import re
from time import sleep
import random
from openpyxl import Workbook


def get_result(url, headers):
    print("开始爬取数据！")
    return requests.get(url, headers=headers)


def pares_data(result):
    print("开始解析数据！")
    titles = re.findall('data-is_focus="" data-sl="">(.*?)</a>', result, re.S)
    location = re.findall('data-log_index="\d+" data-el="region">(.*?)</a>.*?target="_blank">(.*?)</a>', result, re.S)
    introduces = re.findall('<span class="houseIcon"></span>(.*?)</div>', result, re.S)
    stars = re.findall('<span class="starIcon"></span>(.*?)</div>', result, re.S)
    price_all = re.findall('<span class="">(.*?)</span><i>(.*?)</i>', result, re.S)
    price = re.findall('data-price="\d+"><span>(.*?)</span></div></div></div><div class="listButtonContainer">',
                       result, re.S)
    for i in range(0, len(titles)):
        d = {}
        d["标题"] = titles[i]
        location_ = location[i]
        location1 = location_[0]
        location2 = location_[1]
        d["地址"] = location1 + "-" + location2
        d["简介"] = introduces[i]
        d["收藏"] = stars[i]
        d["总价"] = price_all[i][0] + price_all[i][1]
        d["均价"] = price[i]
        l.append(d)


l = []

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43",
    "Cookie":"select_city=110000; lianjia_uuid=5765922f-fab9-4101-99ed-28b595f49389; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22188c1e1d6c82a0-096215f929e85-7e565479-1382400-188c1e1d6c975e%22%2C%22%24device_id%22%3A%22188c1e1d6c82a0-096215f929e85-7e565479-1382400-188c1e1d6c975e%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; lianjia_ssid=b16897a2-e403-430a-a959-f4c2ab0360d5"
}

for i in range(1, 11):
    url = "https://bj.lianjia.com/ershoufang/pg" + str(i) + "/"
    print(url)
    result = get_result(url, headers)
    pares_data(result.text)
    sleep(random.randint(10, 12))
    print(l)


print(l)

wb = Workbook()
active = wb.active
active["A1"] = "标题"
active["B1"] = "地址"
active["C1"] = "简介"
active["D1"] = "收藏"
active["E1"] = "总价"
active["F1"] = "均价"

for i in range(0, len(l)):
    active["A" + str(i + 2)] = l[i]["标题"]
    active["B" + str(i + 2)] = l[i]["地址"]
    active["C" + str(i + 2)] = l[i]["简介"]
    active["D" + str(i + 2)] = l[i]["收藏"]
    active["E" + str(i + 2)] = l[i]["总价"]
    active["F" + str(i + 2)] = l[i]["均价"]

wb.save("1.xlsx")
